### Replication file for
# Title: From Foe to Friend? Government-Opposition Conflict and the Appointment of Cabinet Ministers
# Authors: Herzog, Alexander (alexander.herzog@uni-bamberg.de ; University of Bamberg); Schmuck, David (david.schmuck@uni-bamberg.de ; University of Bamberg , corresponding author)
# Journal: Political Science Research and Methods

# APPENDIX D LLM Validation

# Pre-Settings ----
# Set working directory
# Note: Set working directory for log and input file accordingly
setwd("")

# Clear environment
rm(list = ls(all = TRUE))

# Install and load packages
library(tidyverse) # for data manipulation
library(kableExtra) # to save kable-generated tables

# Load data file
load("Replication data - PSRM - Herzog Schmuck - From Foe to Friend - Appendix D LLM Validation.RData")

# Table D.1: Correlations between Wordfish estimates and GPT coding of speeches ----
tabD1 <- gpt_comparison |> 
  group_by(selection_type) |> 
  summarize(
    corr_pearson_w_desc   = cor(wf_theta, gpt_coded_summaries, method = "pearson"),
    corr_spearman_w_desc  = cor(wf_theta, gpt_coded_summaries, method = "spearman"))

# Print Table D.1
print(tabD1)
save_kable(kable(tabD1, format = "html"), file = "Herzog_Schmuck_TabD1_wordfish_gtp_correlations.html")


# Table D.2: F1 scores for dichotomized Wordfish estimates compared to dichotomized GPT coding of speeches.
confusion_matrix_binary_recoded <- gpt_comparison |>
  filter(!is.na(gpt_binary_recoded)) |> 
  filter(selection_type == "top") |> 
  select(gpt_binary_recoded, wf_binary) |> 
  table()

tabD2 <- gpt_comparison |>
  filter(!is.na(gpt_binary_recoded)) |>
  group_by(selection_type) |>
  summarise(
    TP = sum(gpt_binary_recoded == "GPT opposition" & wf_binary == "WF opposition"),
    TN = sum(gpt_binary_recoded == "GPT government" & wf_binary == "WF government"),
    FP = sum(gpt_binary_recoded == "GPT government" & wf_binary == "WF opposition"),
    FN = sum(gpt_binary_recoded == "GPT opposition" & wf_binary == "WF government"),
    precision = TP / (TP + FP),
    recall    = TP / (TP + FN),
    f1        = 2 * precision * recall / (precision + recall),
    .groups = "drop"
  ) |>
  select(selection_type, precision, recall, f1) |>
  mutate(across(c(precision, recall, f1), ~round(.x, 3)))

# Print Table D.2
print(tabD2)
save_kable(kable(tabD2, format = "html"), file = "Herzog_Schmuck_TabD2_precision_recall_f1.html")


# Figure D.4: Distribution of Wordfish estimates by categories from GPT coding of speech summaries.
figD4 <- ggplot(data = gpt_comparison, aes(x = factor(gpt_coded_summaries), y = wf_theta)) +
  theme_bw(base_size = 20) +
  facet_wrap(~ selection_type,
             labeller = labeller(selection_type = c(
               "top" = "Highest-loading debates",
               "bottom" = "Lowest-loading debates"
             ))) +
  geom_violin(width=1) +
  geom_boxplot(width=0.1, color="black", alpha=0.2) +
  # points + line for the central tendency per bin
  stat_summary(fun = median, geom = "point", size = 2, color = "black") +
  stat_summary(aes(group = 1), fun = median, geom = "point") +
  stat_summary(aes(group = 1), fun = median, geom = "line", linetype = "dashed") +
  labs(x = "GPT coded speech summaries", y = "Wordfish estimates", title = "")

# Plot the graph
plot(figD4)

# Save plot
ggsave(
  filename = "Herzog_Schmuck_FigD4_Wordfish_vs_GPT.tiff",
  device='tiff',
  dpi = 200,
  width = 18,
  height = 10,
  units = "in",
  compression = "lzw"
)
